In this notebook, we try to analyse what makes a character succeed. We first calculate a metric to evaluate if they are famous or not. Then we do all kind of plot for the website: general analysis, creativity analysis, diversity, longevity...
# Import libraries
import pandas as pd
import numpy as np
%matplotlib inline
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import math
import re
import string
import pickle
from collections import Counter
import plotly
import plotly.graph_objects as go
import chart_studio
import chart_studio.tools as tls
import chart_studio.plotly as py
from IPython.display import HTML
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pyperclip
import os
Load the dataset
marvel_pers = pd.read_pickle("data_pickle/marvel_pers_final.txt")
dc_pers = pd.read_pickle("data_pickle/dc_pers_final.txt")
def clean_years(l):
if l is None:
new_l = []
else:
new_l = [year for year in l if year>1930]
return new_l
dc_pers['years'] = dc_pers['years'].apply(clean_years)
marvel_pers['years'] = marvel_pers['years'].apply(clean_years)
dc_pers['years']
dc_pers['First_apparition'] = dc_pers['years'].apply(lambda x: min(x) if len(x)!=0 else None)
marvel_pers['First_apparition'] = marvel_pers['years'].apply(lambda x: min(x) if len(x)!=0 else None)
Find the longevity of each character, i.e. the number of year it appears
marvel_pers['Longevity'] = marvel_pers['years'].apply(lambda years: len(list(set(years))))
dc_pers['Longevity'] = dc_pers['years'].apply(lambda years: len(list(set(years))))
Let's remove characters that don't appear at all in comics
marvel_pers.drop(index = marvel_pers[marvel_pers['Longevity']==0].index,inplace=True)
dc_pers.drop(index = dc_pers[dc_pers['Longevity']==0].index,inplace=True)
marvel_pers.head(10)
Discover the most famous character that are good and bad
#20 most appearance at MARVEL
appear_20 = marvel_pers.sort_values('Number_of_apparitions',ascending=False).head(10)[['Real Name','Current Alias']]
appear_20
#20 most longevity at MARVEL
long_20 = marvel_pers.sort_values('Longevity',ascending=False).head(10)[['Real Name','Current Alias']]
long_20
#both longevity and nb appearance
pd.merge(appear_20,long_20)
We can see that only 3 characters are in the top 20 in number of appearance and in longevity. Maybe we need to define the "famousity" of a character. The famousity will be defined as the harmonic mean between the appearance and the longevity.
marvel_pers['Longevity'].mean()
dc_pers['Longevity'].mean()
To determine the function, we need to see the ditribution of the Longevity and NB of appearance.
fig, axs = plt.subplots(1,2, figsize=(12,8))
#h, bins,_ = plt.hist(marvel_pers['Number_of_apparitions'],bins=10)
nb_bins = 100
logbins = np.logspace(np.log10(1),np.log10(max(marvel_pers['Number_of_apparitions'])),nb_bins)
marvel_app = marvel_pers['Number_of_apparitions'].value_counts(normalize=True)
dc_app = dc_pers['Number_of_apparitions'].value_counts(normalize=True)
axs[0].scatter(marvel_app.index, marvel_app.values, facecolors='none', edgecolors = 'red', alpha=0.95)
axs[0].scatter(dc_app.index, dc_app.values, facecolors='none', edgecolors = 'blue', alpha=0.95)
#axs[0].hist(marvel_pers['Number_of_apparitions'], color = 'red', bins=logbins, alpha=0.7)
#axs[0].hist(dc_pers['Number_of_apparitions'], color = 'blue', bins=logbins,alpha=0.7)
axs[0].set_title('NB of Appearance distribution at Marvel')
axs[0].set_xlabel('Nb of Appearance')
axs[0].legend(['Marvel','DC Comic'])
axs[0].set_xlim((1,10000))
axs[0].set_ylim((2e-5,1))
axs[0].set_ylabel('Count')
axs[0].set_xscale('log')
axs[0].set_yscale('log')
median_marvel = marvel_pers['Number_of_apparitions'].mean()
median_dc = dc_pers['Number_of_apparitions'].mean()
textstr = '\n'.join((\
'Nb appearance: \n'\
r'$\mathrm{Marvel\ Mean}=%.1f$' % (median_marvel, ),\
r'$\mathrm{DC\ Mean}=%.1f$' % (median_dc, )))
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
axs[0].text(0.5, 0.88, textstr, transform=axs[0].transAxes, fontsize=12,\
verticalalignment='top', bbox=props)
nb_bins = 100
logbins = np.logspace(np.log10(1),np.log10(max(marvel_pers['Longevity'])),nb_bins)
marvel_long = marvel_pers['Longevity'].value_counts(normalize=True)
dc_long = dc_pers['Longevity'].value_counts(normalize=True)
axs[1].scatter(marvel_long.index, marvel_long.values, facecolors='none', edgecolors = 'red', alpha=0.95)
axs[1].scatter(dc_long.index, dc_long.values, facecolors='none', edgecolors = 'blue', alpha=0.95)
#axs[1].hist(marvel_pers['Longevity'], color = 'red',bins=logbins, alpha=0.7)
#axs[1].hist(dc_pers['Longevity'], color = 'blue', bins=logbins, alpha=0.7)
axs[1].set_title('Longevity distribution at Marvel')
axs[1].set_xlabel('Longevity')
axs[1].set_ylabel('Count')
axs[1].legend(['Marvel','DC Comic'])
axs[1].set_xscale('log')
axs[1].set_yscale('log')
axs[1].set_xlim((1,90))
axs[1].set_ylim((2e-5,1))
median_marvel = marvel_pers['Longevity'].mean()
median_dc = dc_pers['Longevity'].mean()
textstr1 = '\n'.join((\
'Longevity: \n'\
r'$\mathrm{Marvel\ Mean}=%2.1f$' % (median_marvel, ),\
r'$\mathrm{DC\ Mean}=%2.1f$' % (median_dc, )))
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
axs[1].text(0.5, 0.88, textstr1, transform=axs[1].transAxes, fontsize=12,\
verticalalignment='top', bbox=props)
fig.show()
plt.savefig('img/longevity/histogram_appareace_longevity.png')
We see that the distribution really follows power laws. So for famousity, we could take the mean of the normalized logarithm, i.e: $$\text{Famous} = \frac{\frac{\log(\text{longevity})}{\max(\log(\text{longevity}))}+\frac{\log(\text{Appearance})}{\max(\log(\text{Appearance}))}}{2}$$
marvel_pers['Famous']=(np.log(marvel_pers['Number_of_apparitions'])/max(np.log(marvel_pers['Number_of_apparitions']))+np.log(marvel_pers['Longevity'])/max(np.log(marvel_pers['Longevity'])))/2
dc_pers['Famous']=(np.log(dc_pers['Number_of_apparitions'])/max(np.log(dc_pers['Number_of_apparitions']))+np.log(dc_pers['Longevity'])/max(np.log(dc_pers['Longevity'])))/2
#Top 10 famous character at Marvel
marvel_pers.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
#Top 10 famous character at DC
dc_pers.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
It looks to work, since we find the top of the two list + the ones that are in both list. Let's check, if this techniques for both good and bad character, and see if we recognize them
#10 most famous GOOD character at MARVEL
marvel_pers[marvel_pers['Behavior']=='Good'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
#10 most famous bad character at MARVEL
marvel_pers[marvel_pers['Behavior']=='Bad'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
#10 most famous good character at DC COMICS
dc_pers[dc_pers['Behavior']=='Good'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
#10 most famous good character at DC COMICS
dc_pers[dc_pers['Behavior']=='Bad'].sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
Calculate the score in longevity, apparition and famousity out of 100
marvel_pers['Score appearance'] = np.log(marvel_pers['Number_of_apparitions'])/max(np.log(marvel_pers['Number_of_apparitions']))*100
marvel_pers['Score longevity'] = np.log(marvel_pers['Longevity'])/max(np.log(marvel_pers['Longevity']))*100
marvel_pers['Score Famous'] = 100*marvel_pers['Famous']
dc_pers['Score appearance'] = np.log(dc_pers['Number_of_apparitions'])/max(np.log(dc_pers['Number_of_apparitions']))*100
dc_pers['Score longevity'] = np.log(dc_pers['Longevity'])/max(np.log(dc_pers['Longevity']))*100
dc_pers['Score Famous'] = 100*dc_pers['Famous']
dc_pers
def plot_longevity(category, name):
n = 0
k = 0
is_ = np.empty(2)
activity = pd.Series([])
try:
if len(marvel_pers[marvel_pers[category]==name])==1:
print(3)
activity[k] = pd.Series(marvel_pers[marvel_pers[category]==name]['years'].values[0]).value_counts()
print(2)
appearance = marvel_pers[marvel_pers[category]==name]['Score appearance'].values[0]
print(1)
longevity = marvel_pers[marvel_pers[category]==name]['Score longevity'].values[0]
famous = marvel_pers[marvel_pers[category]==name]['Score Famous'].values[0]
else:
activity[k] = pd.Series(marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
appearance = marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
longevity = marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
famous = marvel_pers[marvel_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]
is_[n] = 1
k += 1
except:
is_[n] = 0
n+=1
try:
if len(dc_pers[dc_pers[category]==name])==1:
activity[k] = pd.Series(dc_pers[dc_pers[category]==name]['years'].values[0]).value_counts()
appearance = dc_pers[dc_pers[category]==name]['Score appearance'].values[0]
longevity = dc_pers[dc_pers[category]==name]['Score longevity'].values[0]
famous = dc_pers[dc_pers[category]==name]['Score Famous'].values[0]
else:
activity[k] = pd.Series(dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
appearance = dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
longevity = dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
famous = dc_pers[dc_pers[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]
is_[n] = 1
k += 1
except:
is_[n] = 0
n+=1
fig, axs = plt.subplots(1,1,figsize=(14,k*6))
l = 0
for j in range(2):
if is_[j]:
if j==0:
axs.bar(x = activity[l].index, height = activity[l], width = 1, color='red')
else:
axs.bar(x = activity[l].index, height = activity[l], width = 1, color='blue')
axs.set_xlabel('Year')
axs.set_ylabel('NB of appearance')
axs.set_xlim(1930,2020)
if j == 0:
axs.set_title('"'+name + '" appearance in Marvel Comics')
else:
axs.set_title('"'+name + '" appearance in DC Comics')
l+=1
textstr = '\n'.join((\
'Score: \n'\
r'$\mathrm{Appearance}=%.1f$' % (appearance, ),\
r'$\mathrm{Longevity}=%.1f$' % (longevity, ),\
r'$\mathrm{Famous}=%.1f$' % (famous, )))
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
axs.text(0.05, 0.95, textstr, transform=axs.transAxes, fontsize=14,\
verticalalignment='top', bbox=props)
fig.show()
plt.savefig('img/longevity/pers_'+name+'.png')
plot_longevity('Current Alias','Batman')
dc_pers[dc_pers['Current Alias']=='Batman'].sort_values(by='Score Famous',ascending=False)
Apparently, there are many Batman, that's why the longevity score of batman is so bad. Should we merge them???
NOTE: It is important to know that a character can appear many times in the dataset, if, for example, the character appeared in other version of the earth, or appeared with different characteristic. Hence, for this part, we will combine all character under the same alias.
def merge_alias(df):
new_df = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias').first()
new_df['years'] = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias')['years'].sum()
new_df['Number_of_apparitions'] = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias')['Number_of_apparitions'].sum()
new_df['First_apparition'] = df.sort_values(by='Score Famous', ascending=False).groupby('Current Alias')['First_apparition'].min()
new_df.drop('Unknown',inplace=True)
new_df = new_df.reset_index()
new_df['Longevity'] = new_df['years'].apply(lambda year: len(list(set(year))))
new_df['Famous']=(np.log(new_df['Number_of_apparitions'])/max(np.log(new_df['Number_of_apparitions']))+np.log(new_df['Longevity'])/max(np.log(new_df['Longevity'])))/2
new_df['Score appearance'] = np.log(new_df['Number_of_apparitions'])/max(np.log(new_df['Number_of_apparitions']))*100
new_df['Score longevity'] = np.log(new_df['Longevity'])/max(np.log(new_df['Longevity']))*100
new_df['Score Famous'] = 100*new_df['Famous']
return new_df
marvel_alias = merge_alias(marvel_pers)
dc_alias = merge_alias(dc_pers)
def plot_longevity(category, name):
n = 0
k = 0
is_ = np.empty(2)
activity = pd.Series([])
try:
if len(marvel_alias[marvel_alias[category]==name]['years'])==1:
print(3)
activity[k] = pd.Series(marvel_alias[marvel_alias[category]==name]['years'].values[0]).value_counts()
print(2)
appearance = marvel_alias[marvel_alias[category]==name]['Score appearance'].values[0]
print(1)
longevity = marvel_alias[marvel_alias[category]==name]['Score longevity'].values[0]
famous = marvel_alias[marvel_alias[category]==name]['Score Famous'].values[0]
else:
activity[k] = pd.Series(marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
appearance = marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
longevity = marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
famous = marvel_alias[marvel_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]
is_[n] = 1
k += 1
except:
is_[n] = 0
n+=1
try:
if len(dc_alias[dc_alias[category]==name]['years'])==1:
activity[k] = pd.Series(dc_alias[dc_alias[category]==name]['years'].values[0]).value_counts()
appearance = dc_alias[dc_alias[category]==name]['Score appearance'].values[0]
longevity = dc_alias[dc_alias[category]==name]['Score longevity'].values[0]
famous = dc_alias[dc_alias[category]==name]['Score Famous'].values[0]
else:
activity[k] = pd.Series(dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['years'].values[0]).value_counts()
appearance = dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score appearance'].values[0]
longevity = dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score longevity'].values[0]
famous = dc_alias[dc_alias[category]==name].sort_values(by='Score Famous',ascending=False)['Score Famous'].values[0]
is_[n] = 1
k += 1
except:
is_[n] = 0
n+=1
fig, axs = plt.subplots(1,1,figsize=(14,k*6))
l = 0
for j in range(2):
if is_[j]:
if j==0:
axs.bar(x = activity[l].index, height = activity[l], width = 1, color='red')
else:
axs.bar(x = activity[l].index, height = activity[l], width = 1, color='blue')
axs.set_xlabel('Year')
axs.set_ylabel('NB of appearance')
axs.set_xlim(1930,2020)
if j == 0:
axs.set_title('"'+name + '" appearance in Marvel Comics')
else:
axs.set_title('"'+name + '" appearance in DC Comics')
l+=1
textstr = '\n'.join((\
'Score: \n'\
r'$\mathrm{Appearance}=%.1f$' % (appearance, ),\
r'$\mathrm{Longevity}=%.1f$' % (longevity, ),\
r'$\mathrm{Famous}=%.1f$' % (famous, )))
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
axs.text(0.05, 0.95, textstr, transform=axs.transAxes, fontsize=14,\
verticalalignment='top', bbox=props)
fig.show()
plt.savefig('img/longevity/alias_'+name+'.png')
plot_longevity('Current Alias','Batman')
dc_alias[dc_alias['Current Alias']=='Batman'].sort_values(by='Score Famous',ascending=False)
The problem seems resolved. Let's check the most famous if they are the same
marvel_alias.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
# check most famous
dc_alias.sort_values('Famous',ascending=False).head(10)[['Real Name','Current Alias']]
We see that the order is different, but the people are roughly the same
Just check the distribution of the score
marvel_alias[marvel_alias['Score Famous']>0]['Score Famous'].hist(bins=100)
We are going to merge the two dataframe and analyse them together in order to determine the factor of famousity of a character. For this, we will keep all the occurence for each alias, so we can determine which part of them, which attributes as more chance to stay longer
#add a tag to know where they are from
marvel_pers['Comic'] = 'Marvel'
dc_pers['Comic'] = 'DC'
marvel_pers.columns
dc_pers.columns
attribute = ['URL', 'Real Name', 'Current Alias', 'Comic', 'Identity', 'Citizenship', 'Marital Status',\
'Occupation', 'Education', 'Gender', 'Height in float', 'Weight in float', 'Eyes', 'Hair',\
'Place of Birth','Behavior',\
'Number_of_apparitions', 'years', 'First_apparition', 'Longevity', 'Score appearance',\
'Score longevity', 'Score Famous']
pers = pd.concat([marvel_pers[attribute],dc_pers[attribute]],axis=0)
pers.rename(inplace=True, columns={'Height in float':'Height', 'Weight in float':'Weight',\
'Number_of_apparitions':'Nb appearance','years':'Years',\
'First_apparition':'First appearance', })
we will seperate the dataset in 3 groups and analyse there attributes. The split is done as followed
pers_score = pers['Score Famous'].value_counts()
pers_high = len(pers[pers['Score Famous']>60])
pers_med = len(pers[(pers['Score Famous']<=60)&(pers['Score Famous']>20)])
pers_low = len(pers[pers['Score Famous']<=20])
total = len(pers['Score Famous'])
fig, axs = plt.subplots(1,1,figsize=(14,6))
axs.bar(pers_score.index, pers_score.values, color='purple')
axs.axvspan(60, 100, alpha=0.2, color='green')
axs.axvspan(20.1, 60, alpha=0.2, color='orange')
axs.axvspan(0, 19.9, alpha=0.2, color='brown')
axs.set_xlabel('Score Famous')
axs.set_ylabel('Number of character')
axs.set_title('Repartition of the character in the group of celebrity')
axs.set_yscale('log')
s = "Low Famousness\n , N = %d\n(%.2f%%)" % (pers_low,pers_low/total*100 )
axs.text(10,1e4,s,ha='center', va='top')
s = "Medium Famousness\n , N = %d\n(%.2f%%)" % (pers_med,pers_med/total*100)
axs.text(40,1e4,s,ha='center', va='top')
s = "High Famousness\n , N = %d\n(%.2f%%)" % (pers_high,pers_high/total*100)
axs.text(80,1e4,s,ha='center', va='top');
plt.savefig('img/longevity/repartition.png')
(For this part, I take the job of Pilou and modify it)
pers_high=pers[pers['Score Famous']>60]
pers_med = pers[(pers['Score Famous']<=60)&(pers['Score Famous']>20)]
pers_low = pers[pers['Score Famous']<=20]
#We normalize the count with the number of characters in every category in order to have a fair comparison
marital_status_high = pd.DataFrame(pers_high["Marital Status"].drop(index=pers_high[pers_high["Marital Status"]=='Unknown'].index).value_counts(normalize=True))
marital_status_high.columns = ['high_count']
marital_status_low = pd.DataFrame(pers_low["Marital Status"].drop(index=pers_low[pers_low["Marital Status"]=='Unknown'].index).value_counts(normalize=True))
marital_status_low.columns = ['low_count']
marital_status_med = pd.DataFrame(pers_med["Marital Status"].drop(index=pers_med[pers_med["Marital Status"]=='Unknown'].index).value_counts(normalize=True))
marital_status_med.columns = ['med_count']
dfList = [marital_status_high, marital_status_low, marital_status_med]
#dfs = [df.set_index(marital_status_high.index) for df in dfList]
marital_status = pd.concat(dfList, axis=1, join='outer', sort=True)\
.fillna(0)
marital_status = marital_status.sort_values(by='high_count', ascending=False)
fig, axs = plt.subplots(1, 1, figsize=(14,8))
X = np.arange(len(marital_status))
axs.bar(X+0.25, height = marital_status['high_count'], width=0.25, color='green', label='high',alpha=1);
axs.bar(X, height = marital_status['med_count'], width=0.25, color='orange', label='medium',alpha=1);
axs.bar(X-0.25, height = marital_status['low_count'], width=0.25, color='brown', label='low',alpha=1);
axs.set_title('Marital Status with respect to the famousness')
axs.set_xlabel('Marital Status')
axs.set_xticklabels([''] + list(marital_status.index))
axs.set_ylabel('Normalized Count (log)')
axs.set_yscale('log')
plt.legend()
plt.savefig('img/longevity/marital_status.png')
#We normalize the count with the number of characters in every category in order to have a fair comparison
gender_high = pd.DataFrame(pers_high["Gender"].drop(index=pers_high[pers_high["Gender"]=='Unknown'].index).value_counts(normalize=True))
gender_high.columns = ['high_count']
gender_low = pd.DataFrame(pers_low["Gender"].drop(index=pers_low[pers_low["Gender"]=='Unknown'].index).value_counts(normalize=True))
gender_low.columns = ['low_count']
gender_med = pd.DataFrame(pers_med["Gender"].drop(index=pers_med[pers_med["Gender"]=='Unknown'].index).value_counts(normalize=True))
gender_med.columns = ['med_count']
dfList = [gender_high, gender_low, gender_med]
gender = pd.concat(dfList, axis=1, join='outer', sort=True)\
.fillna(0)
gender = gender.sort_values(by='high_count', ascending=False)
fig, axs = plt.subplots(1, 1, figsize=(14,8))
X = np.arange(len(gender))
axs.bar(X+0.25, height = gender['high_count'], width=0.25, color='green', label='high');
axs.bar(X, height = gender['med_count'], width=0.25, color='orange', label='medium');
axs.bar(X-0.25, height = gender['low_count'], width=0.25, color='brown', label='low');
axs.set_title('Gender with respect to the famousness')
axs.set_xlabel('Gender')
axs.set_xticklabels([''] + list(gender.index))
axs.set_ylabel('Normalized Count (log)')
axs.set_yscale('log')
plt.legend()
plt.savefig('img/longevity/gender.png')
Transgender are quite new, that's why there have low rank in famousness
height_high = pers_high[(pers_high["Height"]!=np.nan)\
&(pers_high["Height"]<500)]['Height']
height_low = pers_low[(pers_low["Height"]!=np.nan)\
&(pers_low["Height"]<500)]['Height']
height_med = pers_med[(pers_med["Height"]!=np.nan)\
&(pers_med["Height"]<500)]['Height']
fig, axs = plt.subplots(1, 1, figsize=(14,8))
# To rectify the dominant class and still maintain the separateness of the distributions we normalize it by setting
# density=True and stacked=True. By doing so, the total area under each distribution becomes 1.
kwargs = dict(alpha=0.2, bins=100, density=True, stacked=True)
axs.hist(height_high, **kwargs, color='green', label='high')
axs.hist(height_med, **kwargs, color='orange', label='medium')
axs.hist(height_low, **kwargs, color='brown', label='low')
axs.set_title('Probability Histogram of the Height')
axs.set_xlabel('Height in cm')
axs.set_ylabel('Probability')
plt.legend();
plt.savefig('img/longevity/height.png')
weight_high = pers_high[(pers_high["Weight"]!=np.nan)\
&(pers_high["Weight"]<500)]['Weight']
weight_low = pers_low[(pers_low["Weight"]!=np.nan)\
&(pers_low["Weight"]<500)]['Weight']
weight_med = pers_med[(pers_med["Weight"]!=np.nan)\
&(pers_med["Weight"]<500)]['Weight']
fig, axs = plt.subplots(1, 1, figsize=(14,8))
# To rectify the dominant class and still maintain the separateness of the distributions we normalize it by setting
# density=True and stacked=True. By doing so, the total area under each distribution becomes 1.
kwargs = dict(alpha=0.3, bins=100, density=True, stacked=True)
axs.hist(weight_low, **kwargs, color='brown', label='low')
axs.hist(weight_med, **kwargs, color='orange', label='medium')
axs.hist(weight_high, **kwargs, color='green', label='high')
axs.set_title('Probability Histogram of the Weight')
axs.set_xlabel('Weight in kg')
axs.set_ylabel('Probability')
plt.legend();
plt.savefig('img/longevity/weight.png')
There are not many difference. The two picks are the good character, and the bad character
#Top 10 good characthers eyes dataframe
eyes_good_top10 = pd.DataFrame(pers_high['Eyes'].value_counts(normalize=True))
eyes_good_top10 = eyes_good_top10.reset_index()
eyes_good_top10.columns = ['Eyes', 'High']
#Top 10 bad characthers eyes dataframe
eyes_bad_top10 = pd.DataFrame(pers_low['Eyes'].value_counts(normalize=True))
eyes_bad_top10 = eyes_bad_top10.reset_index()
eyes_bad_top10.columns = ['Eyes', 'Low']
#Top 10 neutral characthers eyes dataframe
eyes_neutral_top10 = pd.DataFrame(pers_med['Eyes'].value_counts(normalize=True))
eyes_neutral_top10 = eyes_neutral_top10.reset_index()
eyes_neutral_top10.columns = ['Eyes', 'Medium']
#Join on Eyes
#eyes_neutral_top10.merge(eyes_good_top10,eyes_bad_top10, on='Eyes', how='outer')
dfList = [eyes_good_top10, eyes_bad_top10, eyes_neutral_top10]
dfs = [df.set_index('Eyes') for df in dfList]
eyes = pd.concat(dfs, axis=1, join='outer', sort=True) \
.fillna(0)
eyes.T
from math import pi
def make_spider(df):
# initialize the figure
my_dpi=100
plt.figure(figsize=(1000/my_dpi, 1000/my_dpi), dpi=my_dpi)
# Create a color palette:
my_palette = plt.cm.get_cmap("Set2", len(df.index))
# number of variable
categories=list(df)[1:]
N = len(categories)
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
for row in range(0, len(df.index)):
# Initialise the spider plot
ax = plt.subplot(2,2,row+1, polar=True, )
# If you want the first axis to be on top:
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=8)
# Draw ylabels
ax.set_rlabel_position(0)
list_ = np.round(np.linspace(0+0.05,round(np.max(df.max().values),1)-0.05,3),3)
plt.yticks(list_, map(str,list_), color="grey", size=7)
plt.ylim(0,round(np.max(df.max().values),1))
# Ind1
values=df.iloc[row].values.flatten().tolist()
#print(sum(values))
ax.plot(angles, values, color=my_palette(row), linewidth=2, linestyle='solid')
ax.fill(angles, values, color=my_palette(row), alpha=0.4)
# Add a title
plt.title(df.index[row], size=11, color=my_palette(row), y=1.07)
plt.savefig('img/longevity/eye.png')
#make_spider(eyes.T)
pickle.dump(marvel_pers, open('data_pickle/marvel_pers_final_2','wb'))
pickle.dump(dc_pers, open('data_pickle/dc_pers_final_2','wb'))
# Start froma beginning
marvel_pers = pd.read_pickle("data_pickle/marvel_pers_final_2")
dc_pers = pd.read_pickle("data_pickle/dc_pers_final_2")
######### THIS PLOT IS FOR AN EXEMPLE OF A SIMPLE PLOT, IT WILL NOT BE USED #######################
marvel_line = marvel_pers[(marvel_pers['Marital Status']!='Unknown')].explode('years').dropna(subset=['years']).groupby('years')['Marital Status'].value_counts(normalize=True)
dc_line = dc_pers[(dc_pers['Marital Status']!='Unknown')].explode('years').dropna(subset=['years']).groupby('years')['Marital Status'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
fig = make_subplots(rows=1, cols=2,subplot_titles=("Marvel", "DC Comics"))
color ='blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='purple'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='cyan'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2)),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2),showlegend=False),row=1, col=2)
color ='yellow'
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Remarried'] ,mode='lines',name='Remarried',line=dict(color=color, width=2),showlegend=True),row=1, col=2)
fig.update_xaxes(title_text="Categories", row=1, col=1)
fig.update_xaxes(title_text="Categories", row=1, col=2)
fig.update_yaxes(title_text="Proportion", row=1, col=1)
fig.update_yaxes(title_text="Proportion", row=1, col=2)
fig.update_layout(barmode='group',
title="Marital Status",
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"))
fig.show()
dc_pers['Citizenship'].value_counts().head(10)
marvel_pers['Citizenship'].value_counts().head(10)
############################################################################################################################
## This plot is the generalization of the previous plot, it is made by hand bacause it's the first
## complicated plot made with Plotly, the other plots are more optimized
#######################################################################################################################################
fig = make_subplots(rows=1, cols=2, subplot_titles=("Marvel", "DC Comics"))
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
######### 0: Number of character: 2 trace ########
marvel_line = marvel_explode['years'].value_counts().sort_index()
dc_line = dc_explode['years'].value_counts().sort_index()
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line ,mode='lines',name='Marvel',line=dict(color="#990000", width=3),visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line ,mode='lines',name='DC Comics',line=dict(color="#0F4C81", width=3),visible=False),row=1, col=2)
########## 1: Â MARITAL STATUS : 13 trace ########
marvel_line = marvel_explode[(marvel_explode['Marital Status']!='Unknown')].groupby('years')['Marital Status'].value_counts(normalize=True)
dc_line = dc_explode[(dc_explode['Marital Status']!='Unknown')].explode('years').groupby('years')['Marital Status'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Single'] ,mode='lines',name='Single',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Married'] ,mode='lines',name='Married',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Widowed'] ,mode='lines',name='Widowed',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Divorced'] ,mode='lines',name='Divorced',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='purple'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Separated'] ,mode='lines',name='Separated',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='cyan'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Engaged'] ,mode='lines',name='Engaged',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='yellow'
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Remarried'] ,mode='lines',name='Remarried',line=dict(color=color, width=2),showlegend=True,visible=False, legendgroup=color),row=1, col=2)
fig.update_xaxes(title_text="Year", row=1, col=1)
fig.update_xaxes(title_text="Year", row=1, col=2)
####### ########### 2: GENDER : 10 trace #######################
marvel_line = marvel_explode[(marvel_explode['Gender']!='Unknown')].groupby('years')['Gender'].value_counts(normalize=True)
dc_line = dc_explode[(dc_explode['Gender']!='Unknown')].groupby('years')['Gender'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Male'] ,mode='lines',name='Male',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Male'] ,mode='lines',name='Male',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='magenta'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Female'] ,mode='lines',name='Female',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Female'] ,mode='lines',name='Female',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='grey'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Agender'] ,mode='lines',name='Agender',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Genderless'] ,mode='lines',name='Agender',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Genderfluid'] ,mode='lines',name='Genderfluid',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
color ='brown'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Gestalt'] ,mode='lines',name='Gestalt',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
color ='cyan'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Transgender'] ,mode='lines',name='Transgender',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Transgender'] ,mode='lines',name='Transgender',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
#######Â #######Â ####### 3: Behavior: 6 trace #########################
marvel_line = marvel_explode.groupby('years')['Behavior'].value_counts(normalize=True)
dc_line = dc_explode.groupby('years')['Behavior'].value_counts(normalize=True)
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Good'] ,mode='lines',name='Good',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Good'] ,mode='lines',name='Good',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Neutral'] ,mode='lines',name='Neutral',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Neutral'] ,mode='lines',name='Neutral',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='black'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line['Bad'] ,mode='lines',name='Evil',line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line['Bad'] ,mode='lines',name='Evil',line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
######### ######Â #### 4: Citizenship: 12 traces #######################
#only take the 6 biggest nation represented: American, British, German, Canadian, Japanese, Russian
marvel_line = marvel_explode.groupby('years')['Citizenship'].value_counts()
dc_line = dc_explode.groupby('years')['Citizenship'].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='blue'; category = 'American'; label = 'American'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='blueviolet'; category = 'British'; label = 'British'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='black'; category = 'German'; label = 'German'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='navy'; category = 'Chinese'; label = 'Chinese'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='brown'; category = 'Japanese'; label = 'Japanese'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='red'; category = 'Russian'; label = 'Russian'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
####Â ####Â #### 5: Occupation: 12 traces #########################
#only take 6 more represented occupation: Student, Criminal, Scientist, Adventurer, Mercenary, Soldier
attribut = 'Occupation'
marvel_line = marvel_explode.groupby('years')[attribut].value_counts()
dc_line = dc_explode.groupby('years')[attribut].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='blue'; category = 'Student'; label = 'Student'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='red'; category = 'Criminal'; label = 'Criminal'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='green'; category = 'Scientist'; label = 'Scientist'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='magenta'; category = 'Adventurer'; label = 'Adventurer'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='cyan'; category = 'Mercenary'; label = 'Mercenary'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='orange'; category = 'Soldier'; label = 'Soldier'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
#####Â ######Â Â ######Â 6. Height: 6 traces #######################
median_marvel = marvel_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].median()
low_quar_marvel = marvel_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].quantile(0.25)
high_quar_marvel = marvel_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].quantile(0.75)
median_dc = dc_explode.dropna(subset=['Height in float']).groupby('years')['Height in float'].median()
low_quar_dc = dc_explode.dropna(subset=['Height in float']).explode('years').groupby('years')['Height in float'].quantile(0.25)
high_quar_dc = dc_explode.dropna(subset=['Height in float']).explode('years').groupby('years')['Height in float'].quantile(0.75)
fig.add_trace(go.Scatter(x=low_quar_marvel.index, y=low_quar_marvel,fill=None,mode='lines',line_color='#990000',name='Low quartile',visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=high_quar_marvel.index,y=high_quar_marvel,fill='tonexty', mode='lines', line_color='#990000',name='High quartile',visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=median_marvel.index,y=median_marvel,mode='lines', line_color='whitesmoke',name='Median',visible=False),row=1, col=1)
fig.add_trace(go.Scatter(x=low_quar_dc.index, y=low_quar_dc,fill=None,mode='lines',line_color='#0F4C81',name='Low quartile',showlegend=True,visible=False),row=1, col=2)
fig.add_trace(go.Scatter(x=high_quar_dc.index,y=high_quar_dc,fill='tonexty', mode='lines', line_color='#0F4C81',name='High quartile',showlegend=True,visible=False),row=1, col=2)
fig.add_trace(go.Scatter(x=median_dc.index,y=median_dc,mode='lines', line_color='whitesmoke',name='Median',showlegend=False,visible=False),row=1, col=2)
#####Â ######Â Â ######Â 7. Weight: 6 traces #######################
median_marvel = marvel_explode.dropna(subset=['Weight in float']).groupby('years')['Weight in float'].median()
low_quar_marvel = marvel_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.25)
high_quar_marvel = marvel_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.75)
median_dc = dc_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].median()
low_quar_dc = dc_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.25)
high_quar_dc = dc_explode.dropna(subset=['Weight in float']).explode('years').groupby('years')['Weight in float'].quantile(0.75)
fig.add_trace(go.Scatter(x=low_quar_marvel.index, y=low_quar_marvel,fill=None,mode='lines',line_color='#990000',name='Low quartile',visible=False, legendgroup='low'),row=1, col=1)
fig.add_trace(go.Scatter(x=high_quar_marvel.index,y=high_quar_marvel,fill='tonexty', mode='lines', line_color='#990000',name='High quartile',visible=False, legendgroup='high'),row=1, col=1)
fig.add_trace(go.Scatter(x=median_marvel.index,y=median_marvel,mode='lines', line_color='whitesmoke',name='Median',visible=False, legendgroup='med'),row=1, col=1)
fig.add_trace(go.Scatter(x=low_quar_dc.index, y=low_quar_dc,fill=None,mode='lines',line_color='#0F4C81',name='Low quartile',showlegend=True,visible=False, legendgroup='low'),row=1, col=2)
fig.add_trace(go.Scatter(x=high_quar_dc.index,y=high_quar_dc,fill='tonexty', mode='lines', line_color='#0F4C81',name='High quartile',showlegend=True,visible=False, legendgroup='high'),row=1, col=2)
fig.add_trace(go.Scatter(x=median_dc.index,y=median_dc,mode='lines', line_color='whitesmoke',name='Median',showlegend=False,visible=False, legendgroup='med'),row=1, col=2)
##################### 8: EYES: 12 traces ############
#only take 6 more represented occupation:
attribut = 'Eyes'
marvel_line = marvel_explode.groupby('years')[attribut].value_counts()
dc_line = dc_explode.groupby('years')[attribut].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='brown'; category = 'Brown'; label = 'Brown'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='blue'; category = 'Blue'; label = 'Blue'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='black'; category = 'Black'; label = 'Black'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='green'; category = 'Green'; label = 'Green'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='red'; category = 'Red'; label = 'Red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='white'; category = 'White'; label = 'White'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
##################### 9: Hair: 12 traces ############
#only take 6 more represented occupation:
attribut = 'Hair'
marvel_line = marvel_explode.groupby('years')[attribut].value_counts()
dc_line = dc_explode.groupby('years')[attribut].value_counts()
marvel_line = marvel_line.unstack(level=1)
dc_line = dc_line.unstack(level=1)
color ='brown'; category = 'Brown'; label = 'Brown'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='pink'; category = 'Bald'; label = 'Bald'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='black'; category = 'Black'; label = 'Black'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='yellow'; category = 'Blond'; label = 'Blond'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='red'; category = 'Red'; label = 'Red'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
color ='white'; category = 'White'; label = 'White'
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),visible=False, legendgroup=color),row=1, col=1)
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line[category] ,mode='lines',name=label,line=dict(color=color, width=2),showlegend=False,visible=False, legendgroup=color),row=1, col=2)
# Initialize the axis
fig.update_xaxes(title_text="Year", row=1, col=1)
fig.update_xaxes(title_text="Year", row=1, col=2)
#fig.update_yaxes(title_text="Proportion", row=1, col=1)
#fig.update_yaxes(title_text="Proportion", row=1, col=2)
#intialize title and font
fig.update_layout(barmode='group',
title=dict(text="General analysis",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0,
xanchor='left',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"))
# Make the first traces visible
for i in range(2):
fig.data[i].visible = True
#### BUTTTOONNN####
# create the filters
visible_number= [True]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_marital= [False]*2 + [True]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_gender = [False]*2 + [False]*13 + [True]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_behavior = [False]*2 + [False]*13 + [False]*10 + [True]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_citizen = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [True]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_occupation = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [True]*12 + [False]*6 + [False]*6 + [False]*12 + [False]*12
visible_height = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [True]*6 + [False]*6 + [False]*12 + [False]*12
visible_weight = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [True]*6 + [False]*12 + [False]*12
visible_eyes = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [True]*12 + [False]*12
visible_hair = [False]*2 + [False]*13 + [False]*10 + [False]*6 + [False]*12 + [False]*12 + [False]*6 + [False]*6 + [False]*12 + [True]*12
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
#name of axis
prop = [dict(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper"),
dict(x=0.493,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper")]
yHeight = [dict(x=-0.07,
y=0.5,
showarrow=False,
text="Height [cm]",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper"),
dict(x=0.493,
y=0.5,
showarrow=False,
text="Height [cm]",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper")]
yWeight = [dict(x=-0.07,
y=0.5,
showarrow=False,
text="Weight [kg]",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper"),
dict(x=0.49,
y=0.5,
showarrow=False,
text="Weight [kg]",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper")]
numb = [dict(x=-0.07,
y=0.5,
showarrow=False,
text="Number",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper"),
dict(x=0.493,
y=0.5,
showarrow=False,
text="Number",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper")]
#Add first legend
fig.update_layout(annotations=[go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Number",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper"),
go.layout.Annotation(x=0.493,
y=0.5,
showarrow=False,
text="Number",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper")])
# apply filters
fig.update_layout(
updatemenus=[
#category button
go.layout.Updatemenu(
active=0,
pad={"r": 10, "t": 10},
x=-0.22,
y=1.15,
xanchor='left',
yanchor='top',
buttons=list([
dict(label="Number",
method="update",
args=[{"visible": visible_number},
{"annotations":numb}]),
dict(label="Marital Status",
method="update",
args=[{"visible": visible_marital},
{"annotations": prop}]),
dict(label="Gender",
method="update",
args=[{"visible": visible_gender},
{"annotations":prop}]),
dict(label="Behavior",
method="update",
args=[{"visible": visible_behavior},
{"annotations":prop}]),
dict(label="Citizenship",
method="update",
args=[{"visible": visible_citizen},
{"annotations":numb}]),
dict(label="Occupation",
method="update",
args=[{"visible": visible_occupation},
{"annotations":numb}]),
dict(label="Height",
method="update",
args=[{"visible": visible_height},
{"annotations":yHeight}]),
dict(label="Weight",
method="update",
args=[{"visible": visible_weight},
{"annotations":yWeight}]),
dict(label="Eyes",
method="update",
args=[{"visible": visible_eyes},
{"annotations":numb}]),
dict(label="Hair",
method="update",
args=[{"visible": visible_hair},
{"annotations":numb}])
]),
),
#linlog menu
go.layout.Updatemenu(
active = 0,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'yaxis2': lin2}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'yaxis2': log2}])
])
])
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename='general_caract_line.html', auto_open=False,)
#print(url)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
#print('Save graph to clipboard')
#os.system("echo '%s' | pbcopy" % str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
fig.show()
category = 'Marital Status'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Gender'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
def top_characteristics(dc_df, marvel_df, characteristic = '', top=10, year=False):
'''
This function return a list of the top categories for a characteristic
dc_df: DC dataframe
dc_marvel: Marvel dataframe
characteristic: ['Citizenship', 'Marital Status', 'Occupation', 'Education', 'Gender','Eyes', 'Hair', 'Place of Birth']
'''
if year:
marvel_df = marvel_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_df = dc_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_df = dc_df[dc_df['years']==year]
marvel_df = marvel_df[marvel_df['years']==year]
subdc = pd.DataFrame(dc_df[characteristic].value_counts())
if 'Unknown' in subdc.index:
subdc = subdc.drop('Unknown')
subdc = subdc.reset_index()
subdc['Scores'] = subdc.index
subdc = subdc.set_index('index')
submarvel = pd.DataFrame(marvel_df[characteristic].value_counts())
if 'Unknown' in submarvel.index:
submarvel = submarvel.drop('Unknown')
submarvel = submarvel.reset_index()
submarvel['Scores'] = submarvel.index
submarvel = submarvel.set_index('index')
dc_marvel = submarvel.join(subdc, how='inner', lsuffix='_Marvel', rsuffix='_DC')
dc_marvel['Scores'] = dc_marvel['Scores_Marvel'] + dc_marvel['Scores_DC']
dc_marvel = dc_marvel.sort_values('Scores', ascending=True)
return dc_marvel.iloc[:top].index.tolist()
#The above function is so slow that we have to create a dictionary of it.. to loose time just once
try:
top_label[(2000,'Gender')]
except:
try:
top_label = pd.read_pickle("data_pickle/top_characteristic.pkl")
except:
#from function of Pilou, make a dictionary
category = {0:'Gender',
1:'Marital Status',
2:'Citizenship',
3:'Occupation',
4:'Education',
5:'First_apparition',
6:'Height in float',
7:'Weight in float',
8:'Eyes',
9:'Hair',
10:'Behavior'}
years = range(1930,2021)
top_label = dict([])
for year in years:
for i in range(11):
top_label[(category[i],year)]=top_characteristics(dc_pers, marvel_pers, characteristic = category[i], top=6, year=year)
f = open("data_pickle/top_characteristic.pkl","wb")
pickle.dump(top_label,f)
f.close()
[top_label[('Citizenship',1939)]]
category = 'Citizenship'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Occupation'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Education'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'First_apparition'
name= 'First appearance'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True).sort_index()
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True).sort_index()
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts().sort_index()
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts().sort_index()
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [name,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[name,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=name,
xaxis_title="Year of creation of the character",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
from scipy.stats import norm
category = 'Height in float'
name= 'Height'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True).sort_index()
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True).sort_index()
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts().sort_index()
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts().sort_index()
mu_marvel, std_marvel = norm.fit(hist_marvel)
mu_dc, std_dc = norm.fit(hist_dc)
x = np.linspace(1930, 2020, 100)
p_marvel = norm.pdf(x, mu_marvel, std_marvel)
p_dc = norm.pdf(x, mu_dc, std_dc)
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_scatter(name='Marvel', x=x1, y=hist_marvel, mode='markers',
#text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [name,year],
visible=False, marker=dict(color="#990000"))
fig.add_scatter(name='DC Comic', x=x2, y=hist_dc, mode='markers',
#text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[name,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=name,
xaxis_title="Height [cm]",
xaxis_range=[0,400],
yaxis_type="linear",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 0,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=name+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Weight in float'
name= 'Weight'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True).sort_index()
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True).sort_index()
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts().sort_index()
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts().sort_index()
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_scatter(name='Marvel', x=x1, y=hist_marvel, mode='markers',
#text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [name,year],
visible=False, marker=dict(color="#990000"))
fig.add_scatter(name='DC Comic', x=x2, y=hist_dc, mode='markers',
#text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{hovertext:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[name,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=name,
xaxis_title="Weight [kg]",
xaxis_range=[0,400],
yaxis_type="linear",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 0,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=name+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Eyes'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Hair'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[top_label[(category,year)]]
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[top_label[(category,year)]]
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = 'Behavior'
#create figure
fig = go.Figure()
years = range(1930,2020)
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
#Along the time
for year in years:
#take statistics
if year > 1940:
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)[['Good','Neutral','Bad']]
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)[['Good','Neutral','Bad']]
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()[['Good','Neutral','Bad']]
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()[['Good','Neutral','Bad']]
else:
hist_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts(normalize=True)
hist_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts(normalize=True)
count_marvel = marvel_explode[(marvel_explode[category]!='Unknown')&(marvel_explode['years']==year)][category].value_counts()
count_dc = dc_explode[(dc_explode[category]!='Unknown')&(dc_explode['years']==year)][category].value_counts()
x1 = hist_marvel.index
x2 = hist_dc.index
fig.add_bar(name='Marvel', x=x1, y=hist_marvel,
text=count_marvel, texttemplate = '<b>Marvel:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>Marvel:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_marvel, meta = [category,year],
visible=False, marker=dict(color="#990000"))
fig.add_bar(name='DC Comic', x=x2, y=hist_dc,
text=count_dc, texttemplate = '<b>DC:</b> <br>%{label}: <br>%{text:s} <br>(%{y:%.1f})', textposition='auto', textangle=0,
hovertemplate = '<b>%{meta[0]}<br>%{meta[1]}</b> <br><br><b>DC:</b> <br><i>%{x}</i>: %{text:s} (%{y:%.1f})<extra></extra>', hovertext=count_dc, meta=[category,year],
visible=False, marker=dict(color="#0F4C81"))
# Make 2019 th trace visible
fig.data[178].visible = True
fig.data[179].visible = True
# Change the bar mode
# Create and add slider
steps = []
for i in range(int(len(fig.data)/2)):
step = dict(
method="restyle",
args=["visible", [False] * len(fig.data)],
)
step["args"][1][i*2] = True # Toggle i'th trace to "visible"
step["args"][1][i*2+1] = True # Toggle i'th trace to "visible"
step["label"]=str(1930+i)
#if i == 0:
# step['label'] = 'Global trend'
#else:
# step['label']=str(1930+i)
steps.append(step)
sliders = [dict(
active=89,
currentvalue={"prefix": "Year: "},
pad={"t": 20},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.update_layout(barmode='group',
title=category,
xaxis_title="Categories",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Proportion",
font=dict(size=12),
textangle=-90,
xref="paper",
yref="paper")]
fig.update_layout(annotations=ylabel)
fig.update_layout(
updatemenus=[
#linlog menu
go.layout.Updatemenu(
active = 1,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'annotations': ylabel}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'annotations': ylabel}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
#url = py.plot(fig, filename=category+'_slide.html', auto_open=False,)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
#tls.get_embed(str(url)) #in '' put the printed url
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
category = {0:'First_apparition',
1:'Marital Status',
2:'Citizenship',
3:'Occupation',
4:'Education',
5:'Gender',
6:'Height in float',
7:'Weight in float',
8:'Eyes',
9:'Hair',
10:'Behavior'}
label = {0:'New character',
1:'Marital Status',
2:'Citizenship',
3:'Occupation',
4:'Education',
5:'Gender',
6:'Height in float',
7:'Weight in float',
8:'Eyes',
9:'Hair',
10:'Behavior'}
fig = go.Figure()
marvel_explode = marvel_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
marvel_explode = marvel_explode.drop(index=marvel_explode[marvel_explode['years']==2020].index)
dc_explode = dc_pers.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_explode = dc_explode.drop(index=dc_explode[dc_explode['years']==2020].index)
visible={}
buttons=[]
# ylabel
ylabel = [go.layout.Annotation(x=-0.07,
y=0.5,
showarrow=False,
text="Number of categories",
font=dict(size=13),
textangle=-90,
xref="paper",
yref="paper")]
#Add annotation
new_charac = [go.layout.Annotation(x=1938, y=39, xref="x", yref="y", text="Super-Man",
font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
arrowhead=7, ax=0, ay=-100),
go.layout.Annotation(x=1941, y=360, xref="x", yref="y", text="Captain America",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-40),
go.layout.Annotation(x=1962, y=201, xref="x", yref="y", text="Spider-Man",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-40),
go.layout.Annotation(x=1963, y=221, xref="x", yref="y", text="Iron-Man\n Avengers",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-60),
go.layout.Annotation(x=1974, y=455, xref="x", yref="y", text="Wolverine",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-40),
go.layout.Annotation(x=1939, y=40, xref="x", yref="y", text="Batman",
font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
arrowhead=7, ax=0, ay=-80),
go.layout.Annotation(x=1991, y=587, xref="x", yref="y", text="Deadpool",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-80),
go.layout.Annotation(x=1973, y=435, xref="x", yref="y", text="Thanos",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-100),
go.layout.Annotation(x=1961, y=115, xref="x", yref="y", text="Atom",
font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
arrowhead=7, ax=0, ay=-20),
go.layout.Annotation(x=1980, y=368, xref="x", yref="y", text="She-Hulk",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-20),
go.layout.Annotation(x=1959, y=64, xref="x", yref="y", text="Super Girl",
font=dict(family="Komika Hand",size=15,color="#0F4C81"), showarrow=True,
arrowhead=7, ax=0, ay=-20),
go.layout.Annotation(x=1998, y=395, xref="x", yref="y", text="Spider-Girl",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-20)
]
new_citiz = [go.layout.Annotation(x=1968, y=82, xref="x", yref="y", text="First Swiss",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-40)]
new_gender = [go.layout.Annotation(x=2002, y=5, xref="x", yref="y", text="First Transgender",
font=dict(family="Komika Hand",size=15,color="white"), showarrow=True,
arrowhead=7, ax=0, ay=120),
go.layout.Annotation(x=1949, y=3, xref="x", yref="y", text="First Genderfluid / Loki",
font=dict(family="Komika Hand",size=15,color="#990000"), showarrow=True,
arrowhead=7, ax=0, ay=-60)]
for i in range(10):
if i ==0:
marvel_line = marvel_pers['First_apparition'].value_counts().sort_index()
dc_line = dc_pers['First_apparition'].value_counts().drop(index=2020).sort_index()
elif category[i]=='Citizenship':
#split citizenship
marvel_explode['Citizenship'] = marvel_explode['Citizenship'].apply(lambda s: list(s.split(',')))
marvel_line = marvel_explode.explode('Citizenship').dropna(subset=['Citizenship']).drop_duplicates(subset=['years','URL','Citizenship']).groupby('years')[category[i]].value_counts()
marvel_line = marvel_line.unstack(level=1).count(axis=1).sort_index()
dc_explode['Citizenship'] = dc_explode['Citizenship'].apply(lambda s: list(s.split(',')))
dc_line = dc_explode.explode('Citizenship').dropna(subset=['Citizenship']).drop_duplicates(subset=['years','URL','Citizenship']).groupby('years')[category[i]].value_counts()
dc_line = dc_line.unstack(level=1).count(axis=1).sort_index()
else:
marvel_line = marvel_explode.groupby('years')[category[i]].value_counts()
marvel_line = marvel_line.unstack(level=1).count(axis=1).sort_index()
dc_line = dc_explode.groupby('years')[category[i]].value_counts()
dc_line = dc_line.unstack(level=1).count(axis=1).sort_index()
fig.add_trace(go.Scatter(x=marvel_line.index, y=marvel_line ,mode='lines',name='Marvel',line=dict(color="#990000", width=3),visible=False, fill='tozeroy'))
fig.add_trace(go.Scatter(x=dc_line.index, y=dc_line ,mode='lines',name='DC Comic',line=dict(color="#0F4C81", width=3),visible=False, fill='tozeroy'))
visible[i] = [False]*20
visible[i][2*i] = True
visible[i][2*i+1] = True
if i==0:
buttons.append(dict(label=label[i],
method="update",
args=[{"visible": visible[i]},
{"title": 'Diversity analysis',
'font':dict(family='Komika Hand',
size=11,
color="#7f7f7f"),
"annotations":ylabel+new_charac}]))
elif category[i]=='Citizenship':
buttons.append(dict(label=label[i],
method="update",
args=[{"visible": visible[i]},
{"title": 'Diversity analysis',
'font':dict(family='Komika Hand',
size=11,
color="#7f7f7f"),
"annotations":ylabel+new_citiz}]))
elif category[i]=='Gender':
buttons.append(dict(label=label[i],
method="update",
args=[{"visible": visible[i]},
{"title": 'Diversity analysis',
'font':dict(family='Komika Hand',
size=11,
color="#7f7f7f"),
"annotations":ylabel+new_gender}]))
else:
buttons.append(dict(label=label[i],
method="update",
args=[{"visible": visible[i]},
{"title": 'Diversity analysis',
'font':dict(family='Komika Hand',
size=11,
color="#7f7f7f"),
"annotations":ylabel}]))
# Initialize the axis
fig.update_xaxes(title_text="Year")
#fig.update_yaxes(title_text="Number of categories")
#fig.update_yaxes(title_text="Proportion", row=1, col=1)
#fig.update_yaxes(title_text="Proportion", row=1, col=2)
#intialize title and font
fig.update_layout(barmode='group',
title=dict(text="Diversity analysis",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0,
xanchor='left',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f"))
# Make the first traces visible
for i in range(2):
fig.data[i].visible = True
#### BUTTTOONNN####
#lin or log scale button
lin1 = go.layout.YAxis(visible = True,
type='linear')
lin2 = go.layout.YAxis(visible = True,
type='linear',
anchor='free',
position=0.55)
log1 = go.layout.YAxis(visible = True,
type='log')
log2 = go.layout.YAxis(visible = True,
type='log',
anchor='free',
position=0.55)
#Add first legend
#fig.add_annotation(ylabel+new_charac)
fig.update_layout(annotations=ylabel+new_charac)
# apply filters
fig.update_layout(
updatemenus=[
#category button
go.layout.Updatemenu(
active=0,
pad={"r": 10, "t": 10},
x=-0.22,
y=1.15,
xanchor='left',
yanchor='top',
buttons=list(buttons),
),
#linlog menu
go.layout.Updatemenu(
active = 0,
x=1,
y=1.2,
pad={"r": 10, "t": 10},
xanchor='right',
yanchor='top',
buttons=[
dict(label='Lin-scale',
method='relayout',
args=[{'yaxis': lin1,
'yaxis2': lin2}]),
dict(label='Log-scale',
method='relayout',
args=[{'yaxis': log1,
'yaxis2': log2}])
])
])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='Ahko26', api_key='VBmVVv93RzXI5FVJdRj8')
#url = py.plot(fig, filename='Diversity.html', auto_open=False,)
#print(url)
#tls.get_embed(str(url))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
merge the dataframe reduced by alias
#add a tag to know where they are from
marvel_alias['Comic'] = 'Marvel'
dc_alias['Comic'] = 'DC'
#attribut we want to save
attribute = ['URL', 'Real Name', 'Current Alias', 'Comic', 'Identity', 'Citizenship', 'Marital Status',\
'Occupation', 'Education', 'Gender', 'Height in float', 'Weight in float', 'Eyes', 'Hair',\
'Place of Birth','Behavior',\
'Number_of_apparitions', 'years', 'First_apparition', 'Longevity', 'Score appearance',\
'Score longevity', 'Score Famous']
pers_alias = pd.concat([marvel_alias[attribute],dc_alias[attribute]],axis=0)
pers_alias.rename(inplace=True, columns={'Height in float':'Height', 'Weight in float':'Weight',\
'Number_of_apparitions':'Nb appearance','years':'Years',\
'First_apparition':'First appearance', })
def add_epoch(first):
if first<1960.:
return 1
elif (first>=1960.) & (first <1980.):
return 2
elif (first>=1980.) & (first <2000.):
return 3
else:
return 4
pers_alias['Epoch'] = pers_alias['First appearance'].apply(add_epoch)
#Plot it with ploty
fig = go.Figure()
pers_score_marvel = pd.DataFrame(pers_alias[pers_alias['Comic']=='Marvel']['Score Famous'].value_counts(bins=101)).reset_index()
pers_score_marvel['mid'] = [(x.left+ x.right)/2 for x in pers_score_marvel['index']]
pers_high_marvel = len(pers_alias[(pers_alias['Comic']=='Marvel')&(pers_alias['Score Famous']>66)])
pers_med_marvel = len(pers_alias[(pers_alias['Score Famous']<=66)&(pers_alias['Score Famous']>33)&(pers_alias['Comic']=='Marvel')])
pers_low_marvel = len(pers_alias[(pers_alias['Score Famous']<=33)&(pers_alias['Comic']=='Marvel')])
total_marvel = len(pers_alias[pers_alias['Comic']=='Marvel']['Score Famous'])
pers_score_dc = pd.DataFrame(pers_alias[pers_alias['Comic']=='DC']['Score Famous'].value_counts(bins=101)).reset_index()
pers_score_dc['mid'] = [(x.left+ x.right)/2 for x in pers_score_dc['index']]
pers_high_dc = len(pers_alias[(pers_alias['Comic']=='DC')&(pers_alias['Score Famous']>66)])
pers_med_dc = len(pers_alias[(pers_alias['Score Famous']<=66)&(pers_alias['Score Famous']>33)&(pers_alias['Comic']=='DC')])
pers_low_dc = len(pers_alias[(pers_alias['Score Famous']<=33)&(pers_alias['Comic']=='DC')])
total_dc = len(pers_alias[pers_alias['Comic']=='DC']['Score Famous'])
pers_score = pd.DataFrame(pers_alias['Score Famous'].value_counts(bins=101)).reset_index()
pers_score['mid'] = [(x.left+ x.right)/2 for x in pers_score['index']]
pers_high = pers_high_marvel + pers_high_dc
pers_med = pers_med_marvel + pers_med_dc
pers_low = pers_low_marvel + pers_low_dc
total = total_marvel + total_dc
#'''
fig.add_bar(name='Marvel', x=pers_score_marvel['mid'], y=pers_score_marvel['Score Famous'], width=0.98,
visible=False, marker=dict(color="#990000"),
hovertemplate = '<b>Score: %{x}</b> <br><br><b>Marvel:</b> %{y} <extra></extra>')
fig.add_bar(name='DC Comics', x=pers_score_dc['mid'], y=pers_score_dc['Score Famous'], width=0.98,
visible=False, marker=dict(color="#0F4C81"),
hovertemplate = '<b>Score: %{x}</b> <br><br><b>DC Comics:</b> %{y} <extra></extra>')
fig.add_bar(name='Marvel + DC Comics', x=pers_score['mid'], y=pers_score['Score Famous'], width=0.98,
visible=True, marker=dict(color="purple"),
hovertemplate = '<b>Score: %{x}</b> <br><br><b>Marvel + DC Comics:</b> %{y} <extra></extra>')
'''#this part is too large for plotly, but i dont know why
#hovertemplate = '<b>Score: %{x}</b> <br><br><b>Marvel:</b> %{y} <br> DC: %{meta[0]} <br> <i>Total: %{meta[1]}</i><extra></extra>', meta = [pers_score_dc,pers_score],
fig.add_trace(go.Histogram(x=pers[pers['Comic']=='Marvel']['Score Famous'], nbinsx=1, name='Marvel', visible=False,
marker=dict(color="#990000")
))
#hovertemplate = '<b>Score: %{x}</b> <br><br><b>DC:</b> %{y} <br> Marvel: %{meta[0]} <br> <i>Total: %{meta[1]}</i><extra></extra>', meta = [pers_score_dc,pers_score],
fig.add_trace(go.Histogram(x=pers[pers['Comic']=='DC']['Score Famous'], nbinsx=1, name='DC Comics', visible=False,
marker=dict(color="#0F4C81")
))
fig.add_trace(go.Histogram(x=pers['Score Famous'], nbinsx=1, name='Marvel + DC Comics', visible=True,
marker=dict(color="purple")
))
'''
# Add text labels
text_marvel = [go.layout.Annotation(x=16, y=0.98, xref="x", yref="paper", yanchor= 'top', xanchor='center',
text="<b> Forgotten </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_low_marvel,pers_low_marvel/total_marvel*100),
font=dict(family="Komika Hand",size=10,color="#990000"), showarrow=False,
arrowhead=7, ax=0, ay=0),
go.layout.Annotation(x=50, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
text="<b> Intermediate </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_med_marvel,pers_med_marvel/total_marvel*100),
font=dict(family="Komika Hand",size=10,color="#990000"), showarrow=False,
arrowhead=7, ax=0, ay=0),
go.layout.Annotation(x=84, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
text="<b> Famous </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_high_marvel,pers_high_marvel/total_marvel*100),
font=dict(family="Komika Hand",size=10,color="#990000"), showarrow=False,
arrowhead=7, ax=0, ay=0)]
text_total = [go.layout.Annotation(x=16, y=0.98, xref="x", yref="paper", yanchor= 'top', xanchor='center',
text="<b> Forgotten </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_low,pers_low/total*100),
font=dict(family="Komika Hand",size=10,color="purple"), showarrow=False,
arrowhead=7, ax=0, ay=0),
go.layout.Annotation(x=50, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
text="<b> Intermediate </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_med,pers_med/total*100),
font=dict(family="Komika Hand",size=10,color="purple"), showarrow=False,
arrowhead=7, ax=0, ay=0),
go.layout.Annotation(x=84, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
text="<b> Famous </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_high,pers_high/total*100),
font=dict(family="Komika Hand",size=10,color="purple"), showarrow=False,
arrowhead=7, ax=0, ay=0)]
text_dc = [go.layout.Annotation(x=16, y=0.98, xref="x", yref="paper", yanchor= 'top', xanchor='center',
text="<b> Forgotten </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_low_dc,pers_low_dc/total_dc*100),
font=dict(family="Komika Hand",size=10,color="#0F4C81"), showarrow=False,
arrowhead=7, ax=0, ay=0),
go.layout.Annotation(x=50, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
text="<b> Intermediate </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_med_dc,pers_med_dc/total_dc*100),
font=dict(family="Komika Hand",size=10,color="#0F4C81"), showarrow=False,
arrowhead=7, ax=0, ay=0),
go.layout.Annotation(x=84, y=0.98, xref="x", yref="paper", yanchor= 'top',xanchor='center',
text="<b> Famous </b> <br> <i>Total: {:5d} ({:.0f}%) </i> ".format(pers_high_dc,pers_high_dc/total_dc*100),
font=dict(family="Komika Hand",size=10,color="#0F4C81"), showarrow=False,
arrowhead=7, ax=0, ay=0)]
#create buttons
buttons=[dict(label='Both Comics',
method="update",
args=[{"visible": [False,False,True]},
{"annotations": text_total}]),
dict(label='Marvel',
method="update",
args=[{"visible": [True, False, False]},
{"annotations": text_marvel}]),
dict(label='DC Comics',
method="update",
args=[{"visible": [False, True, False]},
{"annotations": text_dc}])
]
fig.update_layout(
bargap=0.1,
title=dict(text="Repartition of the character in groups of celebrity",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20}),
xaxis_title="Famousness Score",
yaxis_title="Number",
yaxis_type="log",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f")
)
# apply filters
fig.update_layout(
updatemenus=[
#category button
go.layout.Updatemenu(
type = "buttons",
direction= 'down',
borderwidth = 0.5,
active=0,
pad={"r": 10, "t": 10, "b":20},
x=-0.22,
y=1.15,
xanchor='left',
yanchor='top',
buttons=list(buttons)
)
])
# Add shape regions
fig.update_layout(
shapes=[
# 1st highlight 0 to 33
go.layout.Shape(
type="rect",
# x-reference is assigned to the x-values
xref="x",
# y-reference is assigned to the plot paper [0,1]
yref="paper",
x0=0,
y0=0,
x1=33,
y1=1,
fillcolor="black",
opacity=0.3,
layer="above",
line_width=0,
),
# 2nd highlight 33 to 66
go.layout.Shape(
type="rect",
xref="x",
yref="paper",
x0=33,
y0=0,
x1=66,
y1=1,
fillcolor="grey",
opacity=0.3,
layer="above",
line_width=0,
),
# 3rd highlight 66 to 100
go.layout.Shape(
type="rect",
xref="x",
yref="paper",
x0=66,
y0=0,
x1=100,
y1=1,
fillcolor="white",
opacity=0.3,
layer="above",
line_width=0,
)
]
)
# Add text labels
fig.update_layout(annotations=text_total)
fig.show()
#This part save the plot
chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
url = py.plot(fig, filename='repartition_famousness.html', auto_open=False,)
#print(url)
tls.get_embed(str(url))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
###########Â SI LE TEMPS PERMET; CHANGE OCCUPATION ET EDUCATION
n_charac = 13
#things to display
name = {0: 'Superman',
1: 'Captain America',
2: 'Batman',
3: 'Spider-Man',
4: 'Iron Man',
5: 'Green Lantern',
6: 'Human Torch',
7: 'Thor',
8: 'The Flash',
9: 'Mister Fantastic',
10: 'Hulk',
11: 'Wolverine',
12: 'Wonder Woman'
}
alias = {0: 'Superman',
1: 'Captain America',
2: 'Batman',
3: 'Spider-Man',
4: 'Iron Man',
5: 'Green Lantern',
6: 'Human Torch',
7: 'All-Father Thor',
8: 'The Flash',
9: 'Mister Fantastic',
10: 'Hulk',
11: 'Wolverine',
12: 'Wonder Woman'
}
info = {0: ['Alias', 'Current Alias'],
1: ['Real name', 'Real Name'],
2: ['First appearance', 'First appearance'],
3: ['Gender','Gender'],
4: ['Citizenship','Citizenship'],
5: ['Place of Birth','Place of Birth'],
6: ['Marital Status', 'Marital Status'],
7: ['Height','Height'],
8: ['Weight','Weight'],
9: ['Color of Eyes','Eyes'],
10: ['Color of Hair','Hair'],
11: ['Score Appearance','Score appearance'],
12: ['Score Longevity','Score longevity'],
13: ['Score Famousness','Score Famous']
}
fig = go.Figure()
characters = dict([])
info_charac=dict([])
buttons=[]
visible = []
#datas...
for i in range(n_charac):
character = pers_alias[pers_alias['Current Alias']==alias[i]].sort_values('Score Famous',ascending=False).iloc[0]
text = ''
for j in range(14):
if j in [13,11,12]:
text = text + "<b>{}</b>: {:.1f} <br>".format(info[j][0],character[info[j][1]])
elif j == 2:
text = text + "<b>{}</b>: {:.0f} <br>".format(info[j][0],character[info[j][1]])
elif j in [7]:
text = text + "<b>{}</b>: {:.1f} cm <br>".format(info[j][0],character[info[j][1]])
elif j in [8]:
text = text + "<b>{}</b>: {:.1f} kg <br>".format(info[j][0],character[info[j][1]])
else:
text = text + "<b>{}</b>: {}<br>".format(info[j][0],character[info[j][1]])
line = pd.Series(character['Years']).value_counts()
visible = [False]*n_charac
visible[i] = True
if character['Comic']=='Marvel':
info_charac = [go.layout.Annotation(x=0.015, y=0.92, xref="paper", yref="paper", font=dict(family="Arial",size=10,color="white"),
text= text, yanchor='top', xanchor='left' , width=200, height=200,
bgcolor = '#990000', bordercolor='white', borderwidth=2
)]
fig.add_bar(name=name[i], x=line.index, y=line,
hovertemplate = '<b>%{meta}</b> <br><br>Nb appearance in %{x}: %{y}<extra></extra>', meta=name[i],
visible=False, marker=dict(color="#990000"))
buttons.append(dict(label=alias[i],
method="update",
args=[{"visible": visible},
{"annotations": info_charac}]))
if i==0:
fig.update_layout(annotations=info_charac)
else:
info_charac = [go.layout.Annotation(x=0.015, y=0.92, xref="paper", yref="paper", font=dict(family="Arial",size=10,color="white"),
text= text, yanchor='top', xanchor='left' , width=200, height=200,
bgcolor = '#0F4C81', bordercolor='white', borderwidth=2
)]
fig.add_bar(name=name[i], x=line.index, y=line,
hovertemplate = '<b>%{meta}</b> <br><br>Nb appearance in %{x}: %{y}<extra></extra>', meta=name[i],
visible=False, marker=dict(color="#0F4C81"))
buttons.append(dict(label=alias[i],
method="update",
args=[{"visible": visible},
{"annotations": info_charac}]))
if i==0:
fig.update_layout(annotations=info_charac)
#Title, labels...
fig.update_layout(
bargap=0,
title=dict(text="Character:",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20}),
xaxis_title="Year",
yaxis_title="Nb of apparition",
font=dict(family='Komika Hand',
size=11,
color="#7f7f7f")
)
# apply filters
fig.update_layout(
updatemenus=[
#category button
go.layout.Updatemenu(
type = "buttons",
direction= 'down',
borderwidth = 0.5,
active=0,
pad={"r": 10, "t": 10, "b":20},
x=-0.32,
y=1.15,
xanchor='left',
yanchor='top',
buttons=list(buttons)
)
])
# Make the first trace visible
fig.data[0].visible = True
fig.update_xaxes(range=[1930, 2020])
fig.update_yaxes(range=[0, 500])
fig.show()
#This part save the plot
#chart_studio.tools.set_credentials_file(username='Ahko26', api_key='VBmVVv93RzXI5FVJdRj8')
#url = py.plot(fig, filename='Hist_character.html', auto_open=False,)
#print(url)
#tls.get_embed(str(url))
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
pers_alias['Famous']=pers_alias['Score Famous'].apply(lambda x : 'Famous' if x > 66 else ('Intermediate' if x > 3 else 'Forgotten'))
marvel_df = pers_alias[pers_alias['Comic']=='Marvel']
dc_df = pers_alias[pers_alias['Comic']=='DC']
pickle.dump(marvel_df, open('data_pickle/marvel_longevity','wb'))
pickle.dump(dc_df, open('data_pickle/dc_longevity','wb'))